; Vector3: X right, Y down, Z forward.
; On the FP stack it looks like {Y X Z} (Y is used in comparisons).
; In memory it looks like {Z X Y}, which saves a displacement byte.
; (u'v) is the dot product: ux*vx + uy*vy + uz*vz.

org 100h ; assume al=0 bx=0 sp=di=-2 si=0100h bp=09??h
  dw 12        ;=0C 00  or al,0

;Set video mode and earth+sky palette
  mov al,13h
  dec di
  dec di       ; initial pixel_adr@di = -4
P:shr cl,1
  add cl,[si]  ; B@cl = 12..20..43,43..12
  int 10h      ; set video mode / palette color: bx=i dh=R ch=G cl=B

  dw 18        ;=12 00  adc al,[bx+si]

  mov al,bl
  cbw
  xor al,ah    ; 0..127,127..0
  mov ch,al
  shr ch,1     ; G@ch = 0..63,63..0
  mov dh,ch    ; R@dh = 0..63,63..0
  mul al       ; ah = 0..16..63,63..16..0
  
  or bl,bl
BIG equ $-1 ;31195
  jns Q
  xchg ax,dx   ; R@dh = 0..63,63..16..0
Q:mov cl,ah    ; B@cl = 0..16..63,63..0

  mov ax,1010h
  inc bx
  jnz P        ;bx=0 cx=0

;Each frame: Generate gem normals to p0..p12=[bp+200h,300h,...].
M:mov dx,0xA000-10-20-20-4
  mov es,dx    ; dx:bx=YX:XX = 0x9fca:0

  pusha  ; adr:   -18 -16 -14 -12 -10  -8  -6  -4  -2
         ; stack:  di  si  bp  sp  bx  dx  cx  ax   0
         ; data:   -4 100 9??  -2  0  9fca T  key
  mov cx,[si]
G:add bp,si    ; i@cx = 12...1; bp points to p[12-i]; carry=0
  pusha

;Dodecahedron planes with unit normals.
  fld1         ; exact would be atan((1+sqrt5)/2)=1.0172rad
;  fild word[bx-6]
;  fidiv word[bx-16]  ;|t=T/256  morphing shape

  fsincos
  fldz               ;|0 0.540 0.841 (exact: 0.526 0.851)
N:test cl,1          ;|a b c
  jnz K
  fchs
K:fstp st3           ;|b c +-a
  loop N  ;cl=0 cf=0 ;|z x y

;Do a bunch of rotations. It doesn't need to be fast.
; z x y --> cx-sy cy+sx z

  mov cl,25
R:fstp st3           ;|x y z

R2:
  fld st1            ;|y x y z                     ;|x sy x cy z
  fild word[bx-6]
  fidiv word[bx-16]  ;|t=T/256
  fsincos            ;|c=cos(t) s=sin(t) y x y z   ;|c s x sy x cy z
  fmulp st4          ;|s y x cy z                  ;|s x sy cx cy z
  fmulp              ;|sy x cy z                   ;|sx sy cx cy z
  cmc
  jc R2
  faddp st3          ;|sy cx cy+sx z
  fsubp              ;|new.z=cx-sy .x=cy+sx .y=z
  loop R

S:fstp dword[bp+si] ;[bp+100]=.z [bp+104]=.x [bp+108]=.y
  sub si,di
  jpo S  

  popa
  loop G
  popa

; the visible pixels are A0000..AF9FF, I want X=0 Y=0 in the center
;Each pixel: cx=T dx:bx=YX:XX(init=9fca:0) di=adr(init=-4)
X:inc dx       ; part of "dx:bx += 0x0000CCCD"
X2:
  stosb

  pusha        ; adr:     -18 -16 -14 -12 -10  -8  -6  -4  -2
  fninit       ; stack:    di  si  bp  sp  bx  dx  cx  ax   0
  mov bx,es    ; s16:  pixadr 100 9??  -2  ..X..Y  T result
  mov di,-4 ;di = address of pushed ax

;Compute ray direction.
  fild word [byte BIG+si-100h]  ; store 30K as a double, read as two floats
  fst qword[bx]     ; t_front@float[bx] = 0, t_back@float[bx+4] = 6.93
  fild word[di+4-9]
  fild word[di+4-8]  ;|y=Y x=X z=BIG

;Intersect the gem.
  call GEM
  popa         ; color -> pushed ax
  mov al,ah

;  mov al,dl    ; test - show only palette

;; Faster, but lower quality: draw each pixel twice.
;  stosb
;  add bx,0xCCCD; dx:bx = YXX += 0000CCCD
;  adc dx,0

  add bx,0xCCCD; dx:bx = YXX += 0000CCCD
  jnc X2
  jnz X        ; do 65536 pixels

  in al,60h
  dec al
  loopnz M        ; T--
;  ret          ; fallthrough

GEM:
;; Faster: intersect the gem only in the center of the screen
;  add dh,dh
;  jo B
;  add dl,dl
;  jo B
  
;Hit the gem.
  xchg ax,cx   ; ax = T
  mov cx,[si]  ; i@dx = 12...1
;Ray-plane intersection.
;Find the front plane with maximum t and back plane with minimum t.
; tf@[bx],    tb@[bx+4]      ray parameter t
; pf@[bx+si], pb@[bx+4+si]   pointer to plane
I:add bp,si    ; bp points to p[i]
  fldlg2             ;|pd=0.301 y x z
  fadd dword[bp+si]  ;|N=pd-(ro'p[i]) y x z  ; ro = 0 0 -1

  push si      ; Dot product:
D:fld dword[bp+si]   ;|p[i].z ...
  fmul st4           ;|rd.z*p[i].z ...
  sub si,di    ; 100 104 108
  jpo D              ;|(rd*p[i]).y .x .z N rd.y .x .z
  pop si
  faddp
  faddp              ;|D=(rd'p[i]) N y x z
  
;If we hit the plane from the front (D<0), update tf. Otherwise update tb.
  push bx
  fst dword[bp+di]; -> p[i].dot_rd
  test [bp+di+2],sp ; sf=1 if we're in front of the plane
  js FRONT
  sub bx,di    ; bx = address of tf?tb
FRONT:         ; D<0:  if tf*D < N { tf=N/D; pf=current; }  maximalize tf
  fld st0      ; D>=0: if tb*D < N { tb=N/D; pb=current; }  minimalize tb
  fmul dword[bx]     ;|(tf?tb)*D D N y x z

;DosBOX-compatible FPU comparison, +3 bytes
;  fcomp st2          ;|D N y x z    ;compatible version
;  fnstsw ax
;  sahf         ; cf = (tf?tb)*D < N
  fcomip st2

  jc NEXT
  fdivr st1          ;|t=N/D N y x z
  fst dword[bx] ; -> tf?tb
  mov [bx+si],bp ; pf?pb = current
NEXT:
  fcompp
  pop bx             ;|y x z

  mov dx,[bx+2]
  cmp dx,[bx+6]  ; if tf>tb { no_hit: early exit }
  jg B      ;si=100   ;|y x z

  loop I

;Reflect from the gem: reflect(i,n) = i - 2*n*(i'n)
  mov bx,[bx+si] ; pf
;  push si
Y:fld dword[bx+di]   ;|(rd'pf) rd.y .x .z  ; reads pf->dot_rd
  fmul dword[bx+si]  ;|(rd'pf)*pf.z rd.y .x .z
  fadd st0           ;|2*(rd'pf)*pf.z rd.y .x .z
  fsubr st3          ;|R.z=rd.z-2*(rd'pf)*pf.z rd.y .x .z
  sub si,di   ;100 104 108
  jpo Y     ;si=10C  ;|(R=i-2*n(i'n)).y R.x R.z rd.y .x .z
;  pop si

;Environment map: chessboard below, sky gradient above.
B:fist word[di]      ;|y x z
  test [di],sp  ; if y>=-0.5 { chessboard } else { sky }
  js E          ; the sky is just y (= y^2 after gamma)
  
  fidivr word[si]    ;|C/y x z (C = hit?18:12)
  fmul st1,st0
  fmul st2           ;|u=z*C/y v=x*C/y z

  fistp word[bp+di]
  sub al,[bp+di]
  fistp word[bp+di]
  xor al,[bp+di]  ; xortex@ax = (u-T) XOR v
  and al,9
  add al,5        ; tex = (xortex AND 0b1001) + 5  [5|6|13|14]
  shl ax,12
  mul word[di]
  mov [di],dx     ; pushed ax = tex*y/16
  
E:ret
